## Warning: replacing previous import 'GenomicRanges::shift' by 'data.table::shift'
## when loading 'plgINS'

enrichMiR

#' getTS
#'
#' @param species character object. Can be "human", "mouse" or "rat"
#'
#' @return TargetScan miRNA target dataframe with family information in metadata()
#'
getTS <- function(species=c("human","mouse","rat")){
  library(S4Vectors)
  species <- match.arg(species)
  
  # assign species ID
  spec <- switch( species,
                  human = 9606,
                  mouse = 10090,
                  rat = 10116 )
  
  
  # download TargetScan miRNA targeting dataset
  tmp <- tempfile()
  download.file(
    "http://www.targetscan.org/vert_72/vert_72_data_download/Summary_Counts.all_predictions.txt.zip", 
    tmp)
  unzip(tmp)
  full <- fread("Summary_Counts.all_predictions.txt") #, sep = "\t", header = TRUE)
  
  # limit to selected species
  sub <- full[full$'Species ID' == spec,]
  sub$score <- as.numeric(as.character(sub$'Cumulative weighted context++ score'))
    
  # generate TargetScan dataframe
  ts <- DataFrame(family = sub$'miRNA family',
                   rep.miRNA = sub$'Representative miRNA',
                   feature = sub$'Gene Symbol',
                   sites = sub$'Total num conserved sites',
                   score = as.numeric(as.character(sub$'Cumulative weighted context++ score'))
                   )
  ts <- DataFrame(
    aggregate(sub[,c("sites","score")], by=ts[,c("family","feature")], FUN=mean)
    )
  

  
  # download TargetScan miRNA families dataset
  tmp <- tempfile()
  download.file(
    "http://www.targetscan.org/vert_72/vert_72_data_download/miR_Family_Info.txt.zip", 
    tmp)
  unzip(tmp)
  full <- fread("miR_Family_Info.txt") #, sep = "\t", header = TRUE)
  
  # limit to selected species
  sub <- full[full$'Species ID' == spec,]
  
  fam <- sub$`Seed+m8`
  names(fam) <- sub$`MiRBase ID`
  
  # add family info to ts dataframe as attribute
  metadata(ts)$families <- fam
  
  # enrichMiR cant handle 0 values for sites feature
  return(ts[ts$sites!=0,])
}
tests <- c("areamir","overlap","michael","KS","KS2","MW")
# all tests except WO:
tests <- c("overlap","michael","mw","ks","ks2","areamir","areamir2")
# all tests
tests <- c("overlap","michael","wo","mw","ks","ks2","gsea","gseamod","modscore","modsites","areamir","areamir2")
tests <- NULL
cores <- 8

# run enrichMiR on all objects of dea.list (high runtime!)

## foreach: doesnt work
library(foreach)
library(doParallel)
# cl <- makeCluster(cores)
# registerDoParallel(cl, cores=cores)
# chunk.size <- length(dea.names)/cores
# test <- foreach(c=1:cores, .combine="rbind") %dopar% {
#   for(i in dea.names[((c-1)*chunk.size+1):(c*chunk.size)]){
#     lapply(names(props.all), FUN=function(j){
#       enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr, 
#                 cleanNames=TRUE, tests=tests)
#       })
#     }
# }
#stopImplicitCluster()
# stopCluster(cl)

## foreach: Pierre's code (works, but 1 core)
res <- foreach( dea=dea.list, 
                TS=unlist(TS.list,recursive=FALSE) ) %dopar% {
                  enrichMiR( as.data.frame(dea), TS=TS, miRNA.expression=mirexpr, 
                             cleanNames=TRUE, tests=tests )
                }

## mclapply (works, but 1 core; all cores when reduced tests var)
## looks like GSEA, WO, mods are problematic
library(parallel)
# multicores on Linux
e.list <- mclapply(dea.names, FUN=function(i){
  lapply(names(props.all), FUN=function(j){
      enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr, 
                cleanNames=TRUE, tests=tests)
      })
  }, mc.cores = cores )
## mcmapply: inspired by Pierre's code
res <- mcmapply(dea=dea.list, 
                TS=unlist(TS.list,recursive=FALSE), 
                FUN=function(x){
                  enrichMiR( as.data.frame(dea), TS=TS, miRNA.expression=mirexpr, 
                             cleanNames=TRUE, tests=tests )
                  }, mc.cores = cores )


## bplapply: doesnt work at all
e.list <- bplapply(dea.names[1:4], FUN=function(i){
  lapply(names(props.all), FUN=function(j){
      enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr,
                cleanNames=TRUE, tests=tests)
      })
  }, BPPARAM = MulticoreParam(cores, progressbar = TRUE) )


## bpmapply: Pierre's code
res <- bpmapply(dea=dea.list, 
                TS=unlist(TS.list,recursive=FALSE), 
                BPPARAM=MulticoreParam(4, threshold="TRACE"), FUN=function(x){
                  enrichMiR( as.data.frame(dea), TS=TS, miRNA.expression=mirexpr, cleanNames=TRUE, tests=tests )
                  } )


## serial run
e.list <- lapply(dea.names, FUN=function(i){
  lapply(names(props.all), FUN=function(j){
      enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr, 
                cleanNames=TRUE, tests=tests)
      })
  })


# naming
names(e.list) <- dea.names
for(i in dea.names){
  names(e.list[[i]]) <- names(props.all)
}

# use this between parallel runs
gc(verbose = T)

Benchmarking

Plots

## Score analysis:  detPPV

## Score analysis:  FP.atFDR05

## Score analysis:  log10QDiff

## Score analysis:  TP.atFDR05

scores over the datasets

## $`let-7a`
## original       20       30       40       50 
##    0.768    0.768    0.768    0.768    0.645 
## 
## $`miR-1`
## original       20       30       40       50 
##    0.801    0.801    0.801    0.801    0.801 
## 
## $`miR-124`
## original       20       30       40       50 
##    0.801    0.801    0.779    0.539    0.385 
## 
## $`miR-137`
## original       20       30       40       50 
##    0.768    0.768    0.768    0.768    0.601 
## 
## $`miR-139`
## original       20       30       40       50 
##    0.411    0.356    0.229    0.260    0.094 
## 
## $`miR-143`
## original       20       30       40       50 
##    0.824    0.824    0.825    0.824    0.824 
## 
## $`miR-144`
## original       20       30       40       50 
##    0.825    0.641    0.426    0.296    0.211 
## 
## $`miR-153`
## original       20       30       40       50 
##    0.768    0.745    0.568    0.434    0.393 
## 
## $`miR-155`
## original       20       30       40       50 
##    0.795    0.782    0.778    0.781    0.777 
## 
## $`miR-182`
## original       20       30       40       50 
##    0.768    0.439    0.403    0.380    0.342 
## 
## $`miR-199a`
## original       20       30       40       50 
##    0.824    0.783    0.781    0.731    0.602 
## 
## $`miR-204`
## original       20       30       40       50 
##    0.795    0.779    0.779    0.745    0.705 
## 
## $`miR-205`
## original       20       30       40       50 
##    0.824    0.773    0.769    0.690    0.604 
## 
## $`miR-216b`
## original       20       30       40       50 
##    0.712    0.475    0.389    0.174    0.089 
## 
## $`miR-223`
## original       20       30       40       50 
##    0.824    0.824    0.824    0.815    0.759 
## 
## $`miR-7`
## original       20       30       40       50 
##    0.824    0.811    0.805    0.798    0.809

scores over methods

## $aREAmir
## original       20       30       40       50 
##    0.969    0.849    0.768    0.788    0.571 
## 
## $EN.up
## original       20       30       40       50 
##    0.005    0.005    0.005    0.005    0.005 
## 
## $EN.down
## original       20       30       40       50 
##    1.000    0.919    0.828    0.773    0.663 
## 
## $EN.combined
## original       20       30       40       50 
##    0.950    0.863    0.803    0.730    0.590 
## 
## $wEN.up
## original       20       30       40       50 
##    0.005    0.005    0.005    0.005    0.005 
## 
## $wEN.down
## original       20       30       40       50 
##    1.000    0.938    0.906    0.820    0.768 
## 
## $wEN.combined
## original       20       30       40       50 
##    1.000    0.948    0.887    0.789    0.757 
## 
## $michael.up
## original       20       30       40       50 
##    0.005    0.005    0.005    0.005    0.005 
## 
## $michael.down
## original       20       30       40       50 
##    1.000    0.917    0.840    0.770    0.669 
## 
## $michael.combined
## original       20       30       40       50 
##    1.000    0.917    0.865    0.835    0.752 
## 
## $MW
## original       20       30       40       50 
##    0.939    0.841    0.779    0.677    0.558 
## 
## $KS
## original       20       30       40       50 
##    0.847    0.738    0.708    0.632    0.534 
## 
## $KS2
## original       20       30       40       50 
##    0.958    0.919    0.926    0.781    0.715 
## 
## $GSEA
## original       20       30       40       50 
##    0.832    0.779    0.732    0.668    0.701 
## 
## $GSEAmodified
## original       20       30       40       50 
##    0.704    0.603    0.541    0.511    0.474 
## 
## $modSites
## original       20       30       40       50 
##    0.938    0.896    0.830    0.772    0.699 
## 
## $modScore
## original       20       30       40       50 
##    0.943    0.919    0.897    0.837    0.737

TP ratio at FDR 0.05 over methods

## $aREAmir
## original       20       30       40       50 
##    0.875    0.750    0.750    0.604    0.417 
## 
## $EN.up
## original       20       30       40       50 
##    0.125    0.042    0.021    0.021    0.042 
## 
## $EN.down
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $EN.combined
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $wEN.up
## original       20       30       40       50 
##        0        0        0        0        0 
## 
## $wEN.down
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $wEN.combined
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $michael.up
## original       20       30       40       50 
##        0        0        0        0        0 
## 
## $michael.down
## original       20       30       40       50 
##    1.000    1.000    1.000    0.958    0.875 
## 
## $michael.combined
## original       20       30       40       50 
##    1.000    1.000    1.000    0.958    0.917 
## 
## $MW
## original       20       30       40       50 
##    1.000    1.000    1.000    1.000    0.896 
## 
## $KS
## original       20       30       40       50 
##    1.000    1.000    1.000    1.000    0.917 
## 
## $KS2
## original       20       30       40       50 
##    0.938    0.917    0.896    0.771    0.688 
## 
## $GSEA
## original       20       30       40       50 
##    0.125    0.125    0.125    0.125    0.125 
## 
## $GSEAmodified
## original       20       30       40       50 
##    0.438    0.438    0.438    0.438    0.479 
## 
## $modSites
## original       20       30       40       50 
##    0.938    0.938    0.917    0.896    0.875 
## 
## $modScore
## original       20       30       40       50 
##    1.000    1.000    0.958    0.979    0.854